library(Seurat)
library(scCustomize)
library(dplyr)
library(SingleCellExperiment)
library(Matrix.utils)
library(magrittr)
library(RColorBrewer)
library(BiocParallel)
library(pheatmap)
library(DESeq2)
library(msigdbr)
library(fgsea)
library(purrr)
library(stringr)
library(tibble)
library(ggplot2)
seurat_integrated <- readRDS("D:/RStudio Project/Ben_KO_antibody_mice_IL11_BD_single_cell_manucript_submission/results/processed_seurat_annotated.rds")

#Sup Figure 14A-14I

DimPlot(seurat_integrated,
        reduction = "umap.cca",
        label = TRUE,
        label.size = 3)

#Sup Figure 14B
marker_genes = (c("Bex2","Lpl","Lcn2","Il33","Cldn4","Krt8","Rtkn2","Cped1","Igfbp2","Mki67","Birc5","Ube2c",
                  "Cyp2f2","Scgb1a1","Hp","Tppp3","Dynlrb2","Mgp"))

Idents(seurat_integrated) <- factor(x = Idents(seurat_integrated), 
                                    levels = c("AT2","ActivatedAT2","Krt8ADI","AT1Immature","AT1Mature", 
                                               "Proliferating","Club","Ciliated","Stromal"))
p1 = DotPlot(seurat_integrated, features = marker_genes ,cluster.idents = F) + theme(axis.text.x = element_text(angle = 45, hjust=1))
p1

#Sup Figure 14C
seurat_integrated_AT <- readRDS("D:/RStudio Project/Ben_KO_antibody_mice_IL11_BD_single_cell_manucript_submission/results/processed_seurat_AT_annotated.rds")
seurat_integrated_AT_blm= subset(x=seurat_integrated_AT, 
                                 subset= (orig.ident == "WTblmCre" |
                                            orig.ident == "KOblmCre"))

VlnPlot_scCustom(seurat_integrated_AT_blm, features = c("Il11ra1"), split.by = "orig.ident",
                 idents = c("AT2","ActivatedAT2","Krt8ADI"))

#Sup Figure 14D
DimPlot(seurat_integrated_AT,
        reduction = "umap.cca",
        label = TRUE,
        label.size = 3,
        repel = T)

#Sup Figure 14E
seurat_integrated_AT_blm$orig.ident <- factor(x = seurat_integrated_AT_blm$orig.ident, 
                                              levels = c("WTblmCre","KOblmCre"))
FeaturePlot(seurat_integrated_AT_blm, 
            reduction = "umap.cca", 
            features = c("Krt8"), 
            order = TRUE,
            min.cutoff = 'q10', 
            label = F,
            pt.size = 1,
            split.by = "orig.ident",combine = F)
## [[1]]

## 
## [[2]]

FeaturePlot(seurat_integrated_AT_blm, 
            reduction = "umap.cca", 
            features = c("Cldn4"), 
            order = TRUE,
            min.cutoff = 'q10', 
            label = F,
            pt.size = 1,
            split.by = "orig.ident",combine = F)
## [[1]]

## 
## [[2]]

#Figure 4F-4I

#Figure 4F
DimPlot(seurat_integrated_AT,
        reduction = "umap.cca",
        label = F,
        label.size = 5,
        repel = T)

DimPlot(seurat_integrated_AT_blm,
        reduction = "umap.cca",
        label = F,
        label.size = 5,
        repel = T,
        split.by = "orig.ident")

#Figure 4G
n_cells <- FetchData(seurat_integrated_AT, 
                     vars = c("ident", "sample")) %>%
  dplyr::count(ident, sample) %>%
  tidyr::spread(ident, n)

write.csv(n_cells, file = "n_cells_KO.csv")

#Figure 4H
seurat_integrated_AT_blm = AddModuleScore(seurat_integrated_AT_blm,
                                          features = list(c("Fn1", "Ccn2", "Col1a1", "Fbln2", "Sparc", "Itgb3", "Acta2", "Fbln5", "Lama3", "Mmp14", "Lox", "Ntm", "Cald1", "Nt5e", "Fstl3", "Colgalt1", "Bmp1", "Col4a1", "Cxcl2", "Tgm2", "Cd44", "Pmepa1", "Sdc1", "Igfbp3", "Plod2", "Tpm1", "Pdlim4", "Qsox1", "Col4a2", "Ccn1", "Igfbp2", "Crlf1", "Loxl2", "Tnc", "Lgals1", "Col1a2", "Serpinh1", "Sat1", "Rgs4", "Cxcl1", "Basp1", "Calu", "Serpine2", "Bdnf", "Tpm2", "Col5a3", "Cadm1", "Serpine1", "Glipr1", "Adam12", "Col5a2", "Itgav", "Flna", "Htra1", "Inhba", "Gpc1", 
                                                            "Itgb1", "Itga5", "Spp1", "Cdh6", "Anpep", "P3h1", "Vegfc", "Bgn", "Col8a2", "Igfbp4", "Plod1", "Slc6a8")),
                                          name="K8_EMTLeading_all")

FeaturePlot(seurat_integrated_AT_blm, 
            reduction = "umap.cca", 
            features = "K8_EMTLeading_all1", 
            order = TRUE, 
            min.cutoff = 'q10',
            label = F,
            pt.size = 1,
            split.by = "orig.ident",
            cols = c("grey", "red"),combine = F)
## [[1]]

## 
## [[2]]

#Figure 4I
FeaturePlot(seurat_integrated_AT_blm, 
            reduction = "umap.cca", 
            features = c("Col1a1"), 
            order = TRUE,
            min.cutoff = 'q10', 
            label = F,
            pt.size = 1,
            split.by = "orig.ident",combine = F)
## [[1]]

## 
## [[2]]

#Sup Figure 15A-15E

#Sup Figure 15A
counts_AT <- LayerData(object = seurat_integrated_AT, layer = "counts")
metadata_AT_deseq <- seurat_integrated_AT@meta.data
metadata_AT_deseq$sample=as.factor(metadata_AT_deseq$sample)
metadata_AT_deseq$cluster_id <- factor(seurat_integrated_AT@active.ident)
sce <- SingleCellExperiment(assays = list(counts = counts_AT), 
                            colData = metadata_AT_deseq)
groups <- colData(sce)[, c("cluster_id", "sample")]
assays(sce)
## List of length 1
## names(1): counts
kids <- purrr::set_names(levels(sce$cluster_id))
nk <- length(kids)
sids <- purrr::set_names(levels(sce$sample))
ns <- length(sids)
n_cells <- as.numeric(table(sce$sample))
m <- match(sids, sce$sample)
ei <- data.frame(colData(sce)[m, ], 
                 n_cells, row.names = NULL) %>% 
  dplyr::select("sample","orig.ident","n_cells")
groups <- colData(sce)[, c("cluster_id", "sample")]
pb <- aggregate.Matrix(t(counts(sce)), 
                       groupings = groups, fun = "sum") 
splitf <- sapply(stringr::str_split(rownames(pb), 
                                    pattern = "_",  
                                    n = 2), 
                 `[`, 1)

pb <- split.data.frame(pb, 
                       factor(splitf)) %>%
  lapply(function(u) 
    set_colnames(t(u),
                 sapply(stringr::str_split(rownames(u), 
                                           pattern = "_",  
                                           n = 2), 
                        `[`, 2)))
get_sample_ids <- function(x){
  pb[[x]] %>%
    colnames()
}

de_samples <- map(1:length(kids), get_sample_ids) %>%
  unlist()
samples_list <- map(1:length(kids), get_sample_ids)

get_cluster_ids <- function(x){
  rep(names(pb)[x], 
      each = length(samples_list[[x]]))
}

de_cluster_ids <- map(1:length(kids), get_cluster_ids) %>%
  unlist()

gg_df <- data.frame(cluster_id = de_cluster_ids,
                    sample = de_samples)

gg_df <- left_join(gg_df, ei[, c("sample", "orig.ident")]) 

metadata_AT_deseq <- gg_df %>%
  dplyr::select(cluster_id, sample, orig.ident) 

metadata_AT_deseq$cluster_id <- factor(metadata_AT_deseq$cluster_id)
metadata_AT_deseq$gender <- NA
metadata_AT_deseq$gender[which(str_detect(metadata_AT_deseq$sample, "^WTsalineCre_1"))] <- "male"
metadata_AT_deseq$gender[which(str_detect(metadata_AT_deseq$sample, "^KOblmCre_1"))] <- "female"
metadata_AT_deseq$gender[which(str_detect(metadata_AT_deseq$sample, "^KOblmCre_2"))] <- "female"
metadata_AT_deseq$gender[which(str_detect(metadata_AT_deseq$sample, "^KOsalineCre_1"))] <- "female"
metadata_AT_deseq$gender[which(str_detect(metadata_AT_deseq$sample, "^WTblmCre_1"))] <- "female"
metadata_AT_deseq$gender[which(str_detect(metadata_AT_deseq$sample, "^WTblmCre_2"))] <- "male"
metadata_AT_deseq$gender = as.factor(metadata_AT_deseq$gender)
clusters <- levels(metadata_AT_deseq$cluster_id)
clusters
## [1] "ActivatedAT2" "AT1Immature"  "AT1Mature"    "AT2"          "Krt8ADI"
msigdbr_M = msigdbr(species = "Mus musculus", category = "H")
pathways_M_H = split(x = msigdbr_M$gene_symbol, f = msigdbr_M$gs_name)


prop_cex <- function(values, bins, minc = 0.3, maxc = 3, na.value = 0, inf.value = 400)
{   
    values[is.na(values)] = na.value
    values[!is.finite(values)] = inf.value
    
    ordered.values <- values[order(values, decreasing = T)]

    cex.values <- seq(minc, maxc, length.out = length(unique(values)))
    cex.frame <- as.data.frame(cbind(cex.values, lfc =rev(unique(ordered.values))))
    cex.binned <- seq(minc, maxc, length.out = bins)
    outdf <- as.data.frame(values)
    outdf$cex.values = sapply(outdf$values, function (x) cex.frame[which(cex.frame$lfc == x),1])
    cex.values.binned = as.data.frame(sapply(outdf$cex.values, function(x) cut(x, breaks = cex.binned, include.lowest = T, labels = cex.binned[1:length(cex.binned)-1] )), stringsAsFactors = F)
    outdf$binned = as.numeric(levels(cex.values.binned[,1])[cex.values.binned[,1]])
    return(outdf$binned)
}

colorKey <- function(values, pal = viridis(25, option = "B"))
{
    require(pheatmap)
    values_sc <- scale(values)
    bks <- pheatmap:::generate_breaks(values_sc, length(pal), center = F)
    cols <- pheatmap:::scale_colours(values_sc, col=pal, breaks=bks, na_col = "gray")
    cols <- as.character(cols)
    return(cols)
} 

lollipop_GSEA =function(gseadf, pal = colorRampPalette(c("red", "gray", "blue"))(20), show.max = 10, toLower = F, ...)
{
  df = data.frame("NES" = gseadf$NES, "padj" = gseadf$padj, "pathway" = gseadf$pathway)
  df = df[order(df$NES, decreasing = T), ]
  
  if((show.max*2) > nrow(df))
  {
    df.sub = df
  } else {
    
    df.sub = df[c(1:show.max, (nrow(df)-(show.max-1)):nrow(df)), ]
  }
  
  df.sub$pathway = sapply(df.sub$pathway, function(x) gsub(x, pattern = "_", replacement = " "))
  if (toLower == T) df.sub$pathway = tolower(df.sub$pathway)
  bpl = rev(barplot(df.sub$NES, horiz = T, plot = F))
  plot(0,0,cex = 0, xlim=c(min(df.sub$NES)*1.10,max(df.sub$NES)*1.10), ylim = range(c(0,bpl))*1.10, xlab = "Normalized Enrichment Score", ylab = NA, yaxt = "n", ...)
  segments(x0 = rep(0, length(bpl)), y0 = bpl, x1 = df.sub$NES, y1 = bpl)
  segments(x0 = 0, x1 = 0, y0 = bpl, y1 = bpl[length(bpl)])
  df.sub$logP = -log10(df.sub$padj)
  if(length(table(df.sub$logP)) < 3) {bins = 2} else {bins = 3}
  df.sub$cex = prop_cex(df.sub$logP, bins, minc = 2, maxc = 3)
  df.sub$cex[df.sub$logP < 1.3] = 1
  df.sub$color = colorKey(df.sub$NES, pal)
  points(df.sub$NES, bpl, cex = df.sub$cex, pch = 21, bg = df.sub$color)
  
  pos_labels = df.sub$pathway[which(df.sub$NES > 0)]
  neg_labels = df.sub$pathway[which(df.sub$NES < 0)]
  text(x = 0, y = bpl, labels = c(pos_labels, neg_labels), cex = 0.6, pos = c(rep(2,length(pos_labels)), rep(4, length(neg_labels))))
  text(x = c(df.sub$NES[which(df.sub$NES > 0)] + 0.1, df.sub$NES[which(df.sub$NES < 0)] - 0.1), y = bpl, pos = c(rep(4,length(pos_labels)), rep(2, length(neg_labels))), adj = c(rep(1.5,length(pos_labels)), rep(0.5, length(neg_labels))), cex = 0.6, labels = formatC(df.sub$padj, digits = 2, format = "e"))

}

metadata_AT_deseq_loop <- metadata_AT_deseq[which(metadata_AT_deseq$cluster_id == "Krt8ADI"), ]
rownames(metadata_AT_deseq_loop) <- metadata_AT_deseq_loop$sample
counts_loop <- pb[["Krt8ADI"]]
loop_counts <- as.data.frame(as.matrix(counts_loop[, which(colnames(counts_loop) %in% 
                                                             rownames(metadata_AT_deseq_loop))]))
dds_loop <- DESeqDataSetFromMatrix(countData = loop_counts, 
                                   colData = metadata_AT_deseq_loop, 
                                   design = ~ gender + orig.ident)
rld <- rlog(dds_loop, blind=TRUE)
rld_mat <- assay(rld)
rld_cor <- cor(rld_mat)
dds_loop <- DESeq(dds_loop)
res_loop <- results(dds_loop, contrast = c("orig.ident","KOblmCre","WTblmCre"))
res_loop$gene = rownames(res_loop)
res_loop$gene[which(res_loop$gene == "" | 
                      is.na(res_loop$gene))]<- 
  rownames(res_loop)[which(res_loop$gene == ""| 
                             is.na(res_loop$gene))]

res_loop_tbl <- res_loop %>%
  data.frame() %>%
  as_tibble() %>%
  arrange(padj)

write.csv(res_loop_tbl,
          paste0("results/", "Krt8ADI", "_", levels(metadata_AT_deseq_loop$orig.ident)[1], "_vs_", levels(metadata_AT_deseq_loop$orig.ident)[3], "_all_genes.csv"),
          quote = FALSE, 
          row.names = FALSE)

res_loop = as.data.frame(res_loop)
res_loop_2 <- res_loop %>%
  dplyr::select(gene, stat) %>% 
  na.omit() %>% 
  distinct() %>% 
  group_by(gene) #%>% 
ranks_loop <- deframe(res_loop_2)

hallmark_res_loop = fgsea(pathways=pathways_M_H, stats=ranks_loop, nperm = 100000, BPPARAM = SnowParam())
write.csv(apply(hallmark_res_loop,2,as.character),
          paste0("results/", "Krt8ADI", "_", levels(metadata_AT_deseq_loop$orig.ident)[1], "_vs_", levels(metadata_AT_deseq_loop$orig.ident)[3], "_GSEA_Hallmark.csv"))

lollipop_GSEA(hallmark_res_loop, show.max = 15)

#Sup Figure 15B
seurat_integrated_AT_blm = AddModuleScore(seurat_integrated_AT_blm,
                                          features = list(c("Ltbp2", "Pmepa1", "Tgif1", "Ncor2", "Smad7", "Junb", "Ski", "Serpine1", "Smad3", "Ctnnb1", "Nog", "Tgfbr1", "Fnta", "Bmpr2", "Xiap", "Furin", "Smurf1")),
                                          name="K8_TgfbLeading_all")

FeaturePlot(seurat_integrated_AT_blm, 
            reduction = "umap.cca", 
            features = "K8_TgfbLeading_all1", 
            order = TRUE, 
            min.cutoff = 'q10',
            label = F,
            pt.size = 1,
            split.by = "orig.ident",
            cols = c("grey", "red"),combine = F)
## [[1]]

## 
## [[2]]

seurat_integrated_AT_blm = AddModuleScore(seurat_integrated_AT_blm,
                                          features = list(c(c("Ier3", "Mxd1", "Ddit4", "Cdkn2a", "Sdc1", "Cdkn2b", "Sfn", "Krt17", "Dram1", "Zfp36l1", "Pdgfa", "Lif", "Sat1", "Itgb4", "Ccnd2", "Pitpnc1", "Tap1", "Traf4", "Bax", "Ak1", "Cd81", "Vamp8", "Ctsf", "Ptpn14", "Abhd4", "Fgf13", "Apaf1", "Ninj1", "Trp63", "Tcn2", "Rgs16", "Ercc5", "Foxo3", "Dcxr", "Tspyl2", "Acvr1b", "Ier5", "Pidd1", "Zmat3", "Mapkapk3", "Steap3", "H2aj", "App", "Ei24", "Inhbb", "Rps27l", "Ccnd3", "Mdm2", "Btg2", "Def6", "Abcc5", "Tpd52l1", "Ip6k2", "Slc19a2", "Trp53", "Fbxw7", "Wwp1", 
                                                              "Upp1", "Tm7sf3", "Trafd1", "Btg1", "Plxnb2", "Rrad", "Nol8", "Abat", "Rps12", "Gls2", "S100a4", "Bak1", "F2r"))),
                                          name="K8_P53Leading_all")

FeaturePlot(seurat_integrated_AT_blm, 
            reduction = "umap.cca", 
            features = "K8_P53Leading_all1", 
            order = TRUE, 
            min.cutoff = 'q10',
            label = F,
            pt.size = 1,
            split.by = "orig.ident",
            cols = c("grey", "red"),combine = F)
## [[1]]

## 
## [[2]]

#Sup Figure 15C
for( cluster in unique(Idents(seurat_integrated_AT_blm))) {
  print(cluster)
  a <- wilcox.test( seurat_integrated_AT_blm$K8_EMTLeading_all1[ Idents(seurat_integrated_AT_blm) == cluster & seurat_integrated_AT_blm$orig.ident == 'KOblmCre'],
                    seurat_integrated_AT_blm$K8_EMTLeading_all1[ Idents(seurat_integrated_AT_blm) == cluster & seurat_integrated_AT_blm$orig.ident == 'WTblmCre'])
  print(paste0( 'Wilcox p-val: ',a$p.value))
}
## [1] "ActivatedAT2"
## [1] "Wilcox p-val: 2.90179618449096e-129"
## [1] "AT1Immature"
## [1] "Wilcox p-val: 0.0868290708296652"
## [1] "Krt8ADI"
## [1] "Wilcox p-val: 7.55609415499284e-17"
## [1] "AT1Mature"
## [1] "Wilcox p-val: 0.78772313152467"
## [1] "AT2"
## [1] "Wilcox p-val: 0.875217520145433"
options(repr.plot.width = 10, repr.plot.height = 11, repr.plot.res = 100)
p2 <- VlnPlot(seurat_integrated_AT_blm, 'K8_EMTLeading_all1', split.by = 'orig.ident', 
              split.plot = T,pt.size =0,idents = "Krt8ADI") + xlab('') +
  theme(text = element_text(size = 15), axis.text = element_text(size = 15)) + ggtitle('') +
  annotate('text', x = 1.001, y = 1.5, label = c('p=7.5e-17'), size = 7.5)+
  ylab('EMT score module expression') + ylim(-0.5, 1.6) 

p2

#Sup Figure 15D
DimPlot(seurat_integrated_AT,
        reduction = "umap.cca",
        label = F,
        label.size = 4,
        repel = T)

FeaturePlot(seurat_integrated_AT_blm, 
            reduction = "umap.cca", 
            features = c("Fn1"), 
            order = TRUE,
            min.cutoff = 'q10', 
            label = F,
            pt.size = 1,
            split.by = "orig.ident",combine = F)
## [[1]]

## 
## [[2]]

FeaturePlot(seurat_integrated_AT_blm, 
            reduction = "umap.cca", 
            features = c("Ccn2"), 
            order = TRUE,
            min.cutoff = 'q10', 
            label = F,
            pt.size = 1,
            split.by = "orig.ident",combine = F)
## [[1]]

## 
## [[2]]

#Sup Figure 15E

library("slingshot")
seurat_integrated_AT_blm_remapping = FindVariableFeatures(seurat_integrated_AT_blm, 
                                                                 selection.method = "vst",
                                                                 nfeatures = 500, #500
                                                                 verbose = FALSE)

seurat_integrated_AT_blm_remapping <- ScaleData(seurat_integrated_AT_blm_remapping)
seurat_integrated_AT_blm_remapping <- RunPCA(seurat_integrated_AT_blm_remapping)

seurat_integrated_AT_blm_remapping = RunUMAP(seurat_integrated_AT_blm_remapping, dims = 1:6,
                                                    reduction =  "integrated.cca",
                                                    reduction.name = "umap.cca")

dimred_AT_blm_remapping <- seurat_integrated_AT_blm_remapping@reductions$umap.cca@cell.embeddings
clustering_AT_blm_remapping <- seurat_integrated_AT_blm_remapping@active.ident
pal <- c(RColorBrewer::brewer.pal(9, "Set1"), RColorBrewer::brewer.pal(8, "Set2"))
plot(dimred_AT_blm_remapping, col = RColorBrewer::brewer.pal(9,"Set1")[clustering_AT_blm_remapping], cex = 0.5, pch = 16)

seurat_integrated_AT_blm_remapping_KO = subset(x=seurat_integrated_AT_blm_remapping, 
                                                subset= (orig.ident == "KOblmCre"  ))
seurat_integrated_AT_blm_remapping_WT = subset(x=seurat_integrated_AT_blm_remapping, 
                                                subset= (orig.ident == "WTblmCre"  ))
dimred_AT_blm_remapping_KO <- seurat_integrated_AT_blm_remapping_KO@reductions$umap.cca@cell.embeddings
clustering_AT_blm_remapping_KO <- seurat_integrated_AT_blm_remapping_KO@active.ident

dimred_AT_blm_remapping_WT <- seurat_integrated_AT_blm_remapping_WT@reductions$umap.cca@cell.embeddings
clustering_AT_blm_remapping_WT <- seurat_integrated_AT_blm_remapping_WT@active.ident

# Run default Slingshot lineage identification
set.seed(1)
lineagesKO <- getLineages(data = dimred_AT_blm_remapping_KO, clusterLabels = clustering_AT_blm_remapping_KO,start.clus = "AT2")
lineagesKO2=as.SlingshotDataSet(lineagesKO)
lineagesKO2
## class: SlingshotDataSet 
## 
##  Samples Dimensions
##     7397          2
## 
## lineages: 1 
## Lineage1: AT2  ActivatedAT2  Krt8ADI  AT1Immature  AT1Mature  
## 
## curves: 0
curvesKO <- getCurves(lineagesKO2)
curvesKO2 = as.SlingshotDataSet(curvesKO)
curvesKO2
## class: SlingshotDataSet 
## 
##  Samples Dimensions
##     7397          2
## 
## lineages: 1 
## Lineage1: AT2  ActivatedAT2  Krt8ADI  AT1Immature  AT1Mature  
## 
## curves: 1 
## Curve1: Length: 23.552   Samples: 7397
# Run default Slingshot lineage identification
set.seed(1)
lineagesWT <- getLineages(data = dimred_AT_blm_remapping_WT, clusterLabels = clustering_AT_blm_remapping_WT,start.clus = "AT2")
lineagesWT2=as.SlingshotDataSet(lineagesWT)
lineagesWT2
## class: SlingshotDataSet 
## 
##  Samples Dimensions
##    10475          2
## 
## lineages: 2 
## Lineage1: AT2  ActivatedAT2  AT1Immature  Krt8ADI  
## Lineage2: AT2  ActivatedAT2  AT1Immature  AT1Mature  
## 
## curves: 0
curvesWT <- getCurves(lineagesWT2)
curvesWT2 = as.SlingshotDataSet(curvesWT)
curvesWT2
## class: SlingshotDataSet 
## 
##  Samples Dimensions
##    10475          2
## 
## lineages: 2 
## Lineage1: AT2  ActivatedAT2  AT1Immature  Krt8ADI  
## Lineage2: AT2  ActivatedAT2  AT1Immature  AT1Mature  
## 
## curves: 2 
## Curve1: Length: 27.7 Samples: 9419.84
## Curve2: Length: 24.909   Samples: 8255.71
pal <- c(RColorBrewer::brewer.pal(9, "Set1"), RColorBrewer::brewer.pal(8, "Set2"))
plot(dimred_AT_blm_remapping_WT, col = RColorBrewer::brewer.pal(9,"Set1")[clustering_AT_blm_remapping_WT], cex = 0.5, pch = 16)
lines(curvesWT2, lwd = 3, col = 'black')

plot(dimred_AT_blm_remapping_KO, col = RColorBrewer::brewer.pal(9,"Set1")[clustering_AT_blm_remapping_KO], cex = 0.5, pch = 16)
lines(curvesKO2, lwd = 3, col = 'black')

#Figure 5G

seurat_integrated_antibody_AT <- readRDS("D:/RStudio Project/Ben_KO_antibody_mice_IL11_BD_single_cell_manucript_submission/results/processed_seurat_antibody_AT_annotated.rds")
DimPlot(seurat_integrated_antibody_AT,
        reduction = "umap")

DimPlot(seurat_integrated_antibody_AT, pt.size = 1, group.by = "sample", cols = c("purple","green","grey"))

#Figure 5H
n_cells_antibody <- FetchData(seurat_integrated_antibody_AT, 
                     vars = c("ident", "sample")) %>%
  dplyr::count(ident, sample) %>%
  tidyr::spread(ident, n)

write.csv(n_cells_antibody, file = "n_cells_antibody.csv")

#Figure 5I
library(data.table)
library(reshape)
seurat_integrated_antibody_AT_blm= subset(x=seurat_integrated_antibody_AT, 
                                 subset= (orig.ident == "BlmIgGTomato" |
                                            orig.ident == "BlmAntiIL11Tomato"))
seurat_integrated_antibody_AT_blmIgG = subset(x=seurat_integrated_antibody_AT, 
                                     subset= (orig.ident == "BlmIgGTomato"))
seurat_integrated_antibody_AT_blmIL11 = subset(x=seurat_integrated_antibody_AT, 
                                      subset= (orig.ident == "BlmAntiIL11Tomato"))

deseq_pseudobulk_blmIgG_metadata =seurat_integrated_antibody_AT_blmIgG@meta.data
set.seed(30)
rows <- sample(nrow(deseq_pseudobulk_blmIgG_metadata))
deseq_pseudobulk_blmIgG_metadata_randomized <- deseq_pseudobulk_blmIgG_metadata[rows, ]
deseq_pseudobulk_blmIgG_metadata_randomized = split(deseq_pseudobulk_blmIgG_metadata_randomized,f = 1:2,sep=".")
deseq_pseudobulk_blmIgG_metadata_randomized$"1"$pseudosample ="BlmIgGTomato_1_pseudo1"
deseq_pseudobulk_blmIgG_metadata_randomized$"2"$pseudosample ="BlmIgGTomato_1_pseudo2"
deseq_pseudobulk_blmIgG_metadata_randomized =rbindlist(deseq_pseudobulk_blmIgG_metadata_randomized)
deseq_pseudobulk_blmIgG_metadata_randomized = column_to_rownames(deseq_pseudobulk_blmIgG_metadata_randomized,var ="cells")
deseq_pseudobulk_blmIgG_metadata_randomized$cells =rownames(deseq_pseudobulk_blmIgG_metadata_randomized)
deseq_pseudobulk_blmIgG_metadata_randomized =sort_df(deseq_pseudobulk_blmIgG_metadata_randomized,vars = "cells")

deseq_pseudobulk_blmIL11_metadata =seurat_integrated_antibody_AT_blmIL11@meta.data
set.seed(30)
rows <- sample(nrow(deseq_pseudobulk_blmIL11_metadata))
deseq_pseudobulk_blmIL11_metadata_randomized <- deseq_pseudobulk_blmIL11_metadata[rows, ]
deseq_pseudobulk_blmIL11_metadata_randomized = split(deseq_pseudobulk_blmIL11_metadata_randomized,f = 1:2,sep=".")
deseq_pseudobulk_blmIL11_metadata_randomized$"1"$pseudosample ="BlmIL11Tomato_1_pseudo1"
deseq_pseudobulk_blmIL11_metadata_randomized$"2"$pseudosample ="BlmIL11Tomato_1_pseudo2"
deseq_pseudobulk_blmIL11_metadata_randomized =rbindlist(deseq_pseudobulk_blmIL11_metadata_randomized)
deseq_pseudobulk_blmIL11_metadata_randomized = column_to_rownames(deseq_pseudobulk_blmIL11_metadata_randomized,var ="cells")
deseq_pseudobulk_blmIL11_metadata_randomized$cells =rownames(deseq_pseudobulk_blmIL11_metadata_randomized)
deseq_pseudobulk_blmIL11_metadata_randomized =sort_df(deseq_pseudobulk_blmIL11_metadata_randomized,vars = "cells")

deseq_pseudobulk_blm_metadata_randomized2 = rbind(deseq_pseudobulk_blmIgG_metadata_randomized,
                                                  deseq_pseudobulk_blmIL11_metadata_randomized) %>%
  dplyr::select(cells,pseudosample)

metadata_antibody_AT_deseq <- seurat_integrated_antibody_AT_blm@meta.data
metadata_antibody_AT_deseq2 = left_join(metadata_antibody_AT_deseq,deseq_pseudobulk_blm_metadata_randomized2,
                                        by = "cells")

seurat_integrated_antibody_AT_blm2 = seurat_integrated_antibody_AT_blm
seurat_integrated_antibody_AT_blm2@meta.data = metadata_antibody_AT_deseq2

counts_antibody_AT <- LayerData(object = seurat_integrated_antibody_AT_blm2, layer = "counts")
metadata_antibody_AT_deseq <- seurat_integrated_antibody_AT_blm2@meta.data
metadata_antibody_AT_deseq$pseudosample=as.factor(metadata_antibody_AT_deseq$pseudosample)
metadata_antibody_AT_deseq$cluster_id <- factor(seurat_integrated_antibody_AT_blm2@active.ident)
sce <- SingleCellExperiment(assays = list(counts = counts_antibody_AT), 
                            colData = metadata_antibody_AT_deseq)
groups <- colData(sce)[, c("cluster_id", "pseudosample")]
assays(sce)
## List of length 1
## names(1): counts
kids <- purrr::set_names(levels(sce$cluster_id))
nk <- length(kids)
sids <- purrr::set_names(levels(sce$pseudosample))
ns
## [1] 6
n_cells <- as.numeric(table(sce$pseudosample))
m <- match(sids, sce$pseudosample)
ei <- data.frame(colData(sce)[m, ], 
                 n_cells, row.names = NULL) %>% 
  dplyr::select("pseudosample","orig.ident","n_cells")
groups <- colData(sce)[, c("cluster_id", "pseudosample")]
pb <- aggregate.Matrix(t(counts(sce)), 
                       groupings = groups, fun = "sum") 
splitf <- sapply(stringr::str_split(rownames(pb), 
                                    pattern = "_",  
                                    n = 2), 
                 `[`, 1)

pb <- split.data.frame(pb, 
                       factor(splitf)) %>%
  lapply(function(u) 
    set_colnames(t(u),
                 sapply(stringr::str_split(rownames(u), 
                                           pattern = "_",  
                                           n = 2), 
                        `[`, 2)))
get_sample_ids <- function(x){
  pb[[x]] %>%
    colnames()
}

de_samples <- map(1:length(kids), get_sample_ids) %>%
  unlist()
samples_list <- map(1:length(kids), get_sample_ids)

get_cluster_ids <- function(x){
  rep(names(pb)[x], 
      each = length(samples_list[[x]]))
}

de_cluster_ids <- map(1:length(kids), get_cluster_ids) %>%
  unlist()

gg_df <- data.frame(cluster_id = de_cluster_ids,
                    pseudosample = de_samples)

gg_df <- left_join(gg_df, ei[, c("pseudosample", "orig.ident")]) 

metadata_antibody_AT_deseq <- gg_df %>%
  dplyr::select(cluster_id, pseudosample, orig.ident) 

metadata_antibody_AT_deseq$cluster_id <- factor(metadata_antibody_AT_deseq$cluster_id)
clusters <- levels(metadata_antibody_AT_deseq$cluster_id)
clusters
## [1] "ActivatedAT2" "AT1Immature"  "AT1Mature"    "AT2"          "Krt8ADI"
metadata_antibody_AT_deseq_loop <- metadata_antibody_AT_deseq[which(metadata_antibody_AT_deseq$cluster_id == "Krt8ADI"), ]
rownames(metadata_antibody_AT_deseq_loop) <- metadata_antibody_AT_deseq_loop$pseudosample
counts_loop <- pb[["Krt8ADI"]]
loop_counts <- as.data.frame(as.matrix(counts_loop[, which(colnames(counts_loop) %in% 
                                                             rownames(metadata_antibody_AT_deseq_loop))]))
dds_loop <- DESeqDataSetFromMatrix(countData = loop_counts, 
                                   colData = metadata_antibody_AT_deseq_loop, 
                                   design = ~ orig.ident)
rld <- rlog(dds_loop, blind=TRUE)
rld_mat <- assay(rld)
rld_cor <- cor(rld_mat)
dds_loop <- DESeq(dds_loop)
res_loop <- results(dds_loop, contrast = c("orig.ident","BlmAntiIL11Tomato","BlmIgGTomato"))
res_loop$gene = rownames(res_loop)
res_loop$gene[which(res_loop$gene == "" | 
                      is.na(res_loop$gene))]<- 
  rownames(res_loop)[which(res_loop$gene == ""| 
                             is.na(res_loop$gene))]

res_loop_tbl <- res_loop %>%
  data.frame() %>%
  as_tibble() %>%
  arrange(padj)

write.csv(res_loop_tbl,
          paste0("results/", "Krt8ADI", "_", levels(metadata_antibody_AT_deseq_loop$orig.ident)[1], "_vs_", levels(metadata_antibody_AT_deseq_loop$orig.ident)[3], "antibody_all_genes.csv"),
          quote = FALSE, 
          row.names = FALSE)

res_loop = as.data.frame(res_loop)
res_loop_2 <- res_loop %>%
  dplyr::select(gene, stat) %>% 
  na.omit() %>% 
  distinct() %>% 
  group_by(gene) #%>% 
ranks_loop <- deframe(res_loop_2)

hallmark_res_loop = fgsea(pathways=pathways_M_H, stats=ranks_loop, nperm = 100000, BPPARAM = SnowParam())
write.csv(apply(hallmark_res_loop,2,as.character),
          paste0("results/", "Krt8ADI", "_", levels(metadata_antibody_AT_deseq_loop$orig.ident)[1], "_vs_", levels(metadata_antibody_AT_deseq_loop$orig.ident)[3], "antibody_GSEA_Hallmark.csv"))

lollipop_GSEA(hallmark_res_loop, show.max = 15)

#Sup Figure 21A-21C

seurat_integrated_antibody <- readRDS("D:/RStudio Project/Ben_KO_antibody_mice_IL11_BD_single_cell_manucript_submission/results/processed_seurat_antibody.rds")
#Sup Figure 21A
DimPlot(seurat_integrated_antibody,label = T, repel = T)

#Sup Figure 21B
seurat_integrated_antibody_AT_blm$orig.ident <- factor(x = seurat_integrated_antibody_AT_blm$orig.ident, 
                                              levels = c("BlmIgGTomato","BlmAntiIL11Tomato"))
seurat_integrated_antibody_AT_blm= AddModuleScore(seurat_integrated_antibody_AT_blm,
                                          features = list(c("Herpud1", "Eif4a1", "Slc7a5", "Wipi1", "H2ax", "Stc2", "Wfs1", "Atf4", "Nolc1", "Iars", "Ttc37", "Tatdn2", "Psat1", "Ddx10", "Ssr1", "Hspa5", "Chac1", "Atf6", "Pdia5", "Eif2s1", "Xpot", "Eif4a3", "Dcp1a", "Shc1", "Exosc10", "Eif4ebp1", "Ywhaz", "Preb", "Npm1", "Calr", "Xbp1", "Mtrex", "Srpr", "Asns", "Sec31a", "Eif2ak3", "Lsm1", "Tubb2a", "Exosc9", "Ern1", "Tars", "Dnajb9", "Gemin4", "Ero1a", "Eif4e")),
                                          name="K8_UPRLeading_all")

FeaturePlot(seurat_integrated_antibody_AT_blm, 
            reduction = "umap", 
            features = "K8_UPRLeading_all1", 
            order = TRUE, 
            min.cutoff = 'q10',
            label = F,
            pt.size = 1,
            split.by = "orig.ident",
            cols = c("grey", "red"),combine = F)
## [[1]]

## 
## [[2]]

seurat_integrated_antibody_AT_blm = AddModuleScore(seurat_integrated_antibody_AT_blm,
                                          features = list(c("Slc20a1", "Cdkn1c", "Ube2d3", "Bcar3", "Fkbp1a", "Tgfbr1", "Smad1", "Ppp1r15a", "Hipk2", "Skil", "Cdk9", "Pmepa1", "Smad3", "Ltbp2", "Rab31", "Tjp1", "Furin", "Ifngr2", "Klf10", "Cdh1", "Smurf2", "Id3", "Trim33", "Wwtr1", "Hdac1")),
                                          name="K8_TgfbLeading_all")

FeaturePlot(seurat_integrated_antibody_AT_blm, 
            reduction = "umap", 
            features = "K8_TgfbLeading_all1", 
            order = TRUE, 
            min.cutoff = 'q10',
            label = F,
            pt.size = 1,
            split.by = "orig.ident",
            cols = c("grey", "red"),combine = F)
## [[1]]

## 
## [[2]]

seurat_integrated_antibody_AT_blm = AddModuleScore(seurat_integrated_antibody_AT_blm,
                                          features = list(c("Lamc2", "Plaur", "Ecm1", "Nt5e", "Tpm2", "Fstl3", "Glipr1", "Tnfrsf12a", "Itgav", "Cald1", "Dcn", "Pvr", "Tnc", "Pmp22", "Basp1", "Fbln2", "Tgm2", "Qsox1", "Fermt2", "Ccn2", "Calu", "Flna", "Tpm4", "Tagln", "Cxcl5", "Capg", "Itgb1", "Plod1", "Inhba", "Vegfc", "Pdlim4", "Col6a2", "Pmepa1", "Notch2", "Cxcl3", "Jun", "Sat1", "Ccn1")),
                                          name="K8_EMTLeading_all")

FeaturePlot(seurat_integrated_antibody_AT_blm, 
            reduction = "umap", 
            features = "K8_EMTLeading_all1", 
            order = TRUE, 
            min.cutoff = 'q10',
            label = F,
            pt.size = 1,
            split.by = "orig.ident",
            cols = c("grey", "red"),combine = F)
## [[1]]

## 
## [[2]]

#Sup Figure 21C
for( cluster in unique(Idents(seurat_integrated_antibody_AT_blm))) {
  print(cluster)
  a <- wilcox.test( seurat_integrated_antibody_AT_blm$K8_EMTLeading_all1[ Idents(seurat_integrated_antibody_AT_blm) == cluster & seurat_integrated_antibody_AT_blm$orig.ident == 'BlmAntiIL11Tomato'],
                    seurat_integrated_antibody_AT_blm$K8_EMTLeading_all1[ Idents(seurat_integrated_antibody_AT_blm) == cluster & seurat_integrated_antibody_AT_blm$orig.ident == 'BlmIgGTomato'])
  print(paste0( 'Wilcox p-val: ',a$p.value))
}
## [1] "ActivatedAT2"
## [1] "Wilcox p-val: 3.93662642573117e-27"
## [1] "AT2"
## [1] "Wilcox p-val: 0.640174751857505"
## [1] "AT1Immature"
## [1] "Wilcox p-val: 0.335616204492432"
## [1] "Krt8ADI"
## [1] "Wilcox p-val: 4.32520265641476e-07"
## [1] "AT1Mature"
## [1] "Wilcox p-val: 0.00678733031674208"
p3 <- VlnPlot(seurat_integrated_antibody_AT_blm, 'K8_EMTLeading_all1', split.by = 'orig.ident', 
              split.plot = T,pt.size =0,idents = "Krt8ADI") + xlab('') +
  theme(text = element_text(size = 15), axis.text = element_text(size = 15)) + ggtitle('') +
  annotate('text', x = 1.001, y = 1.1, label = c('p=4.3e-07'), size = 7.5)+
    ylab('EMT score module expression') + ylim(-.6, 1.1) 
p3